library(ggplot2)
library(dplyr)
library(reshape2)
library(fst)
library(haven)

load("all_sibs.rdata")

full_sib <- 
  full_sib %>% ungroup() %>% 
  filter(run_kv_1993 != 1)

# create bars per year for local elections  

# create empty data frames
bar_data <- tibble()
ttest_data <- tibble()

#only look at the first time they run. 
for(k in seq(1997, 2013, by = 4)){
  #  inc <- 
  #    read.fst(paste("ind", k, ".fst", sep = ""))  
  # average over three years of income 
  
  inc1 <- 
    read.fst(paste("ind", k - 1, ".fst", sep = ""))  
  inc2 <- 
    read.fst(paste("ind", k - 2, ".fst", sep = ""))  
  inc3 <- 
    read.fst(paste("ind", k - 3, ".fst", sep = ""))  
  
  inc <- 
    left_join(inc1 %>% select(c("PNR", "PERINDKIALT_13")), 
              inc2 %>% select(c("PNR", "PERINDKIALT_13")), 
              by = "PNR") %>% 
    left_join(., inc3 %>% select(c("PNR", "PERINDKIALT_13")),
              by = "PNR") %>% 
    mutate(PERINDKIALT_13 = rowMeans(cbind(PERINDKIALT_13.x, PERINDKIALT_13.y, PERINDKIALT_13), na.rm = TRUE)) %>% 
    select(c("PNR", PERINDKIALT_13))
  
  
  bef <-
    read.fst(paste("bef", k, ".fst", sep = ""))  %>% 
    select("PNR", "KOEN", "ALDER")
  
  inc <- 
    left_join(inc, bef, by = "PNR") %>% 
    group_by(KOEN, ALDER) %>% 
    mutate(inc_tile = ntile(PERINDKIALT_13, 20)) %>% 
    ungroup()
  
  for(j in 1:2){
    
    var <- c("run_kv", "elected_kv")
    lab <- c("local candidate", "local winner")
    
    full_sib$quant_year <- (full_sib[,paste(var[j],"_", k, sep = "")])
    
    sibs_year <- 
      as.data.frame(full_sib) %>% 
      group_by(fe_sibship) %>% 
      summarise(pol_fam = sum(quant_year))
    
    sibs_year <- 
      sibs_year %>% 
      filter(pol_fam > 0) 
    
    sibs_pol <- 
      full_sib %>% 
      filter(fe_sibship %in% sibs_year$fe_sibship)
    
    data_an <-
      left_join(inc, sibs_pol, by = "PNR") %>% 
      filter(!is.na(.[,paste(var[j], "_", k, sep = "")]))
    
    plot_data <- 
      tibble(type = lab[j],
             inc_tile  = 1:20, 
             year = k) %>%
      left_join(., data_an %>%
                  filter(.[,paste(var[j], "_", k, sep = "")] == 1) %>%
                  group_by(inc_tile) %>% 
                  summarize(politician_margin = n()) %>% 
                  mutate(politician_margin = ifelse(is.na(politician_margin), 0, politician_margin)), 
                by =  "inc_tile") %>% 
      left_join(., data_an %>%
                  filter(.[,paste(var[j], "_", k, sep = "")] == 0) %>%
                  group_by(inc_tile) %>% 
                  summarize(sibling = n()) %>% 
                  mutate(sibling = ifelse(is.na(sibling), 0, sibling)),  
                by =  "inc_tile")
    
    bar_data <-
      bar_data %>%
      bind_rows(plot_data)  
    
  }
  full_sib <- 
    filter(full_sib, full_sib[, paste0("run_kv_", k)] != 1)
  print(k)
  print(Sys.time())
}


full_sib <- 
  full_sib %>% ungroup() %>% 
  filter(run_fv_1990_FV != 1)

bar_data_fv <- data_frame()

for(k in c(1994, 1998, 2001, 2005, 2007, 2011, 2015)){
  inc1 <- 
    read.fst(paste("ind", k - 1, ".fst", sep = ""))  
  inc2 <- 
    read.fst(paste("ind", k - 2, ".fst", sep = ""))  
  inc3 <- 
    read.fst(paste("ind", k - 3, ".fst", sep = ""))  
  
  inc <- 
    left_join(inc1 %>% select(c("PNR", "PERINDKIALT_13")), 
              inc2 %>% select(c("PNR", "PERINDKIALT_13")), 
              by = "PNR") %>% 
    left_join(., inc3 %>% select(c("PNR", "PERINDKIALT_13")),
              by = "PNR") %>% 
    mutate(PERINDKIALT_13 = rowMeans(cbind(PERINDKIALT_13.x, PERINDKIALT_13.y, PERINDKIALT_13), na.rm = TRUE)) %>% 
    select(c("PNR", PERINDKIALT_13))
  
  
  bef <-
    read.fst(paste("bef", k, ".fst", sep = ""))  %>% 
    select("PNR", "KOEN", "ALDER")
  
  inc <- 
    left_join(inc, bef, by = "PNR") %>% 
    group_by(KOEN, ALDER) %>% 
    mutate(inc_tile = ntile(PERINDKIALT_13, 20)) %>% 
    ungroup()
  
  for(j in 1:2){
    
    var <- c("run_fv", "elected_fv")
    lab <- c("parliament candidate", "parliament winner")
    
    inc <- 
      read.fst(paste("ind", k, ".fst", sep = "")) %>%
      mutate(inc_tile = ntile(PERINDKIALT_13, 20))
    
    
    if(j == 1) {full_sib$quant_year <- unlist(full_sib[,paste(var[j],"_", k, "_FV", sep = "")])}
    if(j == 2) {full_sib$quant_year <- unlist(full_sib[,paste(var[j],"_", k, sep = "")])}
    
    sibs_year <- 
      as.data.frame(full_sib) %>% 
      group_by(fe_sibship) %>% 
      summarise(pol_fam = sum(quant_year))
    
    sibs_year <- 
      sibs_year %>% 
      filter(pol_fam > 0) 
    
    sibs_pol <- 
      full_sib %>% 
      filter(fe_sibship %in% sibs_year$fe_sibship)
    
    if (j == 1){
      data_an <-
        left_join(inc, sibs_pol, by = "PNR") %>% 
        filter(!is.na(.[,paste(var[j], "_", k, "_FV", sep = "")]))
    }
    if (j == 2){
      data_an <-
        left_join(inc, sibs_pol, by = "PNR") %>% 
        filter(!is.na(.[,paste(var[j], "_", k, sep = "")]))
    }
    
    if (j == 1){
      plot_data <- 
        tibble(type = lab[j],
               inc_tile  = 1:20, 
               year = k) %>%
        left_join(., data_an %>%
                    filter(.[,paste(var[j], "_", k, "_FV", sep = "")] == 1) %>%
                    group_by(inc_tile) %>% 
                    summarize(politician_margin = n()) %>% 
                    mutate(politician_margin = ifelse(is.na(politician_margin), 0, politician_margin)), 
                  by =  "inc_tile") %>% 
        left_join(., data_an %>%
                    filter(.[,paste(var[j], "_", k, "_FV", sep = "")] == 0) %>%
                    group_by(inc_tile) %>% 
                    summarize(sibling = n()) %>% 
                    mutate(sibling = ifelse(is.na(sibling), 0, sibling)),  
                  by =  "inc_tile")    
    }
    
    if (j == 2){
      plot_data <- 
        tibble(type = lab[j],
               inc_tile  = 1:20, 
               year = k) %>%
        left_join(., data_an %>%
                    filter(.[,paste(var[j], "_", k, sep = "")] == 1) %>%
                    group_by(inc_tile) %>% 
                    summarize(politician_margin = n()) %>% 
                    mutate(politician_margin = ifelse(is.na(politician_margin), 0, politician_margin)), 
                  by =  "inc_tile") %>% 
        left_join(., data_an %>%
                    filter(.[,paste(var[j], "_", k, sep = "")] == 0) %>%
                    group_by(inc_tile) %>% 
                    summarize(sibling = n()) %>% 
                    mutate(sibling = ifelse(is.na(sibling), 0, sibling)),  
                  by =  "inc_tile")    
    }
    
    bar_data <-
      bar_data %>%
      bind_rows(plot_data)  
    
  }
  full_sib <- 
    filter(full_sib, full_sib[, paste0("run_fv_", k, "_FV")] != 1)
  print(k)
  print(Sys.time())
}


bar_data2 <- 
  melt(as.data.frame(bar_data), 
       id = c("type", "inc_tile", "year")) %>%
  mutate(value = ifelse(is.na(value), 0, value)) %>%
  group_by(type, inc_tile, variable) %>% 
  summarise(count = sum(value)) 

bar_data_count <- 
  bar_data2 %>% 
  group_by(type, variable) %>% 
  summarise(total =  sum(count))

bar_data2 <- 
  left_join(bar_data2, bar_data_count, by = c("type", "variable")) %>% 
  mutate(share = count / total)

# change order of factor

bar_data2 <- 
  bar_data2 %>%
  ungroup() %>%
  mutate(type = as.factor(c(rep("Running for \nmunicipality", 40),
                            rep("Elected for \nmunicipality", 40),
                            rep("Running for \nparliament"  , 40), 
                            rep("Elected for \nparliament"  , 40))),
         type = factor(type, levels = levels(type)[c(3,1,4,2)]),
         variable = ifelse(variable == "politician_margin", "Politicians", "Siblings"))

# create plot

plot <-
  ggplot(data = bar_data2,
         aes(x = inc_tile, 
             y = share) ) +
  facet_grid(type ~ variable) +
  geom_bar(stat = "identity", position = "dodge" ) +
  theme_classic() + 
  scale_x_continuous("", breaks = seq(0,20,5),
                     labels = c("0 %",
                                "25 %",
                                "50 %",
                                "75 %",
                                "100 %")) + 
  scale_y_continuous("") + 
  geom_hline(yintercept = 0.05, linetype = "dashed", alpha = 0.5)